# before
before_res_matrix <-
before_res_matrix |>
rename("x.cell" = "x","y.cell" = "y") |>
left_join(ave_before_jaccard,by = c("x.cell","y.cell"))
# after
after_res_matrix <-
after_res_matrix |>
rename("x.cell" = "x","y.cell" = "y") |>
left_join(ave_after_jaccard,by = c("x.cell","y.cell"))
# after2
after_res_matrix2 <-
after_res_matrix2 |>
rename("x.cell" = "x","y.cell" = "y") |>
left_join(ave_after_jaccard2,by = c("x.cell","y.cell"))
# Bin by distance between cells (GCD in km's)
before_res_matrix$cutdist <-
cut(before_res_matrix$gcd,
breaks = c(0, 2000, 4000, 6000, 8000, 10000, 12000,
14000, 16000, 18000, 20000),
labels = c("0", "2000", "4000", "6000","8000",
"10000", "12000","14000", "16000", "18000"),
include.lowest = TRUE)
after_res_matrix$cutdist <-
cut(after_res_matrix$gcd,
breaks = c(0, 2000, 4000, 6000, 8000, 10000, 12000,
14000, 16000, 18000, 20000),
labels = c("0", "2000", "4000", "6000","8000",
"10000", "12000","14000", "16000", "18000"),
include.lowest = TRUE)
after_res_matrix2$cutdist <-
cut(after_res_matrix2$gcd,
breaks = c(0, 2000, 4000, 6000, 8000, 10000, 12000,
14000, 16000, 18000, 20000),
labels = c("0", "2000", "4000", "6000","8000",
"10000", "12000","14000", "16000", "18000"),
include.lowest = TRUE)
# Average and sd for Before.
sumRes_01 <-
before_res_matrix |>
group_by(cutdist) |>
summarise(
# Jaccard
avg = mean(avg_jaccard, na.rm = TRUE),
sdev = sd(avg_jaccard, na.rm = TRUE),
n = n(),
se = sdev/sqrt(n),
# Quantiles
first = quantile(avg_jaccard,probs=0.25, na.rm= TRUE),
second = quantile(avg_jaccard,probs=0.975, na.rm = TRUE)
) |>
mutate(label = 'Before',label = as.factor(label)) |>
mutate(cutdist = cutdist,
cutdist = factor(cutdist,levels = c("0","2000","4000","6000","8000","10000","12000","14000","16000","18000","20000"))) |>
mutate(ci = se * qt(.975, n - 1), ci = as.numeric(ci)) |>
as.data.frame() |> suppressWarnings() # This was added to ignore the last observation.
# Average and sd for the After.
sumRes_02 <-
after_res_matrix |>
group_by(cutdist) |>
summarise(
# Jaccard
avg = mean(avg_jaccard, na.rm = TRUE),
sdev = sd(avg_jaccard, na.rm = TRUE),
n = n(),
se = sdev/sqrt(n),
# Quantiles
first = quantile(avg_jaccard,probs=0.25),
second = quantile(avg_jaccard,probs=0.975)
) |>
mutate(label = 'After',label = as.factor(label)) |>
mutate(cutdist = cutdist,
cutdist = factor(cutdist,levels = c("0","2000","4000","6000","8000","10000","12000","14000","16000","18000","20000"))) |>
mutate(ci = se * qt(.975, n - 1), ci = as.numeric(ci)) |>
as.data.frame() |> suppressWarnings() # This was added to ignore the last observation.
# Average and sd for the After 2.
sumRes_025 <-
after_res_matrix2 |>
group_by(cutdist) |>
summarise(
# Jaccard
avg = mean(avg_jaccard, na.rm = TRUE),
sdev = sd(avg_jaccard, na.rm = TRUE),
n = n(),
se = sdev/sqrt(n),
# Quantiles
first = quantile(avg_jaccard,probs=0.25),
second = quantile(avg_jaccard,probs=0.95)
) |>
mutate(label = 'After-2',label = as.factor(label)) |>
mutate(cutdist = cutdist,
cutdist = factor(cutdist,levels = c("0","2000","4000","6000","8000","10000","12000","14000","16000","18000","20000"))) |>
mutate(ci = se * qt(.95, n - 1), ci = as.numeric(ci)) |>
as.data.frame() |> suppressWarnings() # This was added to ignore the last observation.
# Combine the two.
sumRes_03 <- bind_rows(sumRes_01,sumRes_02, sumRes_025)
# Plot.
sumRes_03 |>
arrange(label) |>
mutate(label = factor(label, levels=c("Before", "After", "After-2"))) |> # reorder the intervals
ggplot(aes(x = cutdist, y = avg, group = label, colour = label, fill = label)) +
geom_errorbar(aes(ymin = second, ymax = first), width = 0.05, linewidth = 1, position = position_dodge(width = 0.3)) +
geom_line(linewidth = 1.2, position = position_dodge(width = 0.3)) +
ylim(0, 0.7) +
scale_size_continuous(breaks = c(5, 10, 15, 20, 25, 30)) +
geom_point(aes(size = n), shape = 21, fill = "white", stroke = 2, position = position_dodge(width = 0.3)) +
scale_fill_manual(values = c("black", "darkorange4","goldenrod3")) +
scale_color_manual(values = c("black","darkorange4","goldenrod3")) +
labs(x = "Great Circle Distance (km)",
y = "Similarity Value",
title = "Jaccard",
subtitle = "Subsampling by cells and occurrences", colour = "Stages", size = "Cell-pair comparison") +
theme_classic() +
theme(plot.title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
legend.title = element_text(face = "bold"),
aspect.ratio = 1)